library(haven) # 读取NHANES XPT文件的包
library(plyr) # 用于数据处理
library(dplyr) # 用于数据处理
library(arsenal) # 用于数据快速预览
library(survey) # 用于加权情况下的分析


setwd("D:\\NHANES DATA") # 设置工作目录
demo_d = read_xpt("2005-2006\\Demographics\\demo_d.xpt") # DEMO-人口学数据提取：2017-2018
demo_e = read_xpt("2007-2008\\Demographics\\demo_e.xpt") # DEMO-人口学数据提取：2017-2018
# tab_demo_d <- tableby(~RIDAGEYR + factor(RIAGENDR) + RIDSTATR, data = demo_d)
# summary(tab_demo_d, text=TRUE)
# tab_demo_e <- tableby(~RIDAGEYR + factor(RIAGENDR) + RIDSTATR, data = demo_e)
# summary(tab_demo_e, text=TRUE)
View(demo_d)
View(demo_e)
demo_data_file = dplyr::bind_rows(list(demo_d, demo_e))
View(demo_data_file)

data = demo_data_file[ , c("SEQN")]
write.table (data,file ="aaaaaaa.csv", row.names = FALSE, sep =",") # 以逗号分隔数据列，不含行号，含列名，字符串带引号



# dim(demo_data_file) # [1] 20497    47
demo_data = demo_data_file[, c("SEQN", "RIDAGEYR", "RIAGENDR")] #
View(demo_data)
# RIDAGEYR - 筛选时裁定的年龄 - 重新编码   0 到 84：取值范围；85：>= 85 岁 # RIAGENDR - 性别  1：男性；2：女性
bmx_d = read_xpt("2005-2006\\Examination\\bmx_d.xpt")
bmx_e = read_xpt("2007-2008\\Examination\\bmx_e.xpt")
bmx_data_file = dplyr::bind_rows(list(bmx_d, bmx_e))
bmx_data = bmx_data_file[, c("SEQN", "BMXBMI")] # BMXBMI - 身体质量指数 (kg/m**2)  11.74 至 130.21：取值范围
# dim(bmx_data) # [1] 19712     2
crp_d = read_xpt("2005-2006\\Laboratory\\crp_d.xpt")
crp_e = read_xpt("2007-2008\\Laboratory\\crp_e.xpt")
crp_data_file <- dplyr::bind_rows(list(crp_d, crp_e))
dim(crp_data_file) # [1] 18152     2
